/*
 * Unidata Platform
 * Copyright (c) 2013-2020, UNIDATA LLC, All rights reserved.
 *
 * Commercial License
 * This version of Unidata Platform is licensed commercially and is the appropriate option for the vast majority of use cases.
 *
 * Please see the Unidata Licensing page at: https://unidata-platform.com/license/
 * For clarification or additional options, please contact: info@unidata-platform.com
 * -------
 * Disclaimer:
 * -------
 * THIS SOFTWARE IS DISTRIBUTED "AS-IS" WITHOUT ANY WARRANTIES, CONDITIONS AND
 * REPRESENTATIONS WHETHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE
 * IMPLIED WARRANTIES AND CONDITIONS OF MERCHANTABILITY, MERCHANTABLE QUALITY,
 * FITNESS FOR A PARTICULAR PURPOSE, DURABILITY, NON-INFRINGEMENT, PERFORMANCE AND
 * THOSE ARISING BY STATUTE OR FROM CUSTOM OR USAGE OF TRADE OR COURSE OF DEALING.
 */
package org.unidata.mdm.dq.core.service.impl.function.system.string;

import org.apache.commons.lang3.StringUtils;
import org.unidata.mdm.dq.core.context.CleanseFunctionContext;
import org.unidata.mdm.dq.core.dto.CleanseFunctionResult;
import org.unidata.mdm.dq.core.service.impl.function.system.AbstractSystemCleanseFunction;
import org.unidata.mdm.dq.core.type.cleanse.CleanseFunctionConfiguration;
import org.unidata.mdm.dq.core.type.cleanse.CleanseFunctionExecutionScope;
import org.unidata.mdm.dq.core.type.cleanse.CleanseFunctionInputParam;
import org.unidata.mdm.dq.core.type.cleanse.CleanseFunctionOutputParam;
import org.unidata.mdm.dq.core.type.cleanse.CleanseFunctionPortFilteringMode;
import org.unidata.mdm.dq.core.type.cleanse.CleanseFunctionPortInputType;
import org.unidata.mdm.dq.core.type.cleanse.CleanseFunctionPortValueType;
import org.unidata.mdm.dq.core.type.constant.CleanseConstants;
import org.unidata.mdm.system.util.TextUtils;

/**
 * Cleanup 'noise' from the input string.
 * @author ilya.bykov
 *
 */
public class CleanupNoise extends AbstractSystemCleanseFunction {
    /**
     * Display name code.
     */
    private static final String FUNCTION_DISPLAY_NAME = "app.dq.functions.system.string.cleanup.noise.display.name";
    /**
     * Description code.
     */
    private static final String FUNCTION_DESCRIPTION = "app.dq.functions.system.string.cleanup.noise.decsription";
    /**
     * IP1 name code.
     */
    private static final String INPUT_PORT_1_NAME = "app.dq.functions.system.string.cleanup.noise.input.port1.name";
    /**
     * IP1 description code.
     */
    private static final String INPUT_PORT_1_DESCRIPTION = "app.dq.functions.system.string.cleanup.noise.input.port1.decsription";
    /**
     * OP1 name code.
     */
    private static final String OUTPUT_PORT_1_NAME = "app.dq.functions.system.string.cleanup.noise.output.port1.name";
    /**
     * OP1 description code.
     */
    private static final String OUTPUT_PORT_1_DESCRIPTION = "app.dq.functions.system.string.cleanup.noise.output.port1.decsription";
    /**
     * This function configuration.
     */
    private static final CleanseFunctionConfiguration CONFIGURATION
        = CleanseFunctionConfiguration.configuration()
            .supports(CleanseFunctionExecutionScope.LOCAL)
            .input(CleanseFunctionConfiguration.port()
                    .name(CleanseConstants.INPUT_PORT_1)
                    .displayName(() -> TextUtils.getText(INPUT_PORT_1_NAME))
                    .description(() -> TextUtils.getText(INPUT_PORT_1_DESCRIPTION))
                    .filteringMode(CleanseFunctionPortFilteringMode.MODE_ONCE)
                    .inputTypes(CleanseFunctionPortInputType.SIMPLE)
                    .valueTypes(CleanseFunctionPortValueType.STRING)
                    .required(true)
                    .build())
            .output(CleanseFunctionConfiguration.port()
                    .name(CleanseConstants.OUTPUT_PORT_1)
                    .displayName(() -> TextUtils.getText(OUTPUT_PORT_1_NAME))
                    .description(() -> TextUtils.getText(OUTPUT_PORT_1_DESCRIPTION))
                    .filteringMode(CleanseFunctionPortFilteringMode.MODE_ALL)
                    .inputTypes(CleanseFunctionPortInputType.SIMPLE)
                    .valueTypes(CleanseFunctionPortValueType.STRING)
                    .required(true)
                    .build())
            .build();
    /**
     * Constructor.
     */
    public CleanupNoise() {
        super("CleanupNoise", () -> TextUtils.getText(FUNCTION_DISPLAY_NAME), () -> TextUtils.getText(FUNCTION_DESCRIPTION));
    }
    /**
     * {@inheritDoc}
     */
    @Override
    public CleanseFunctionConfiguration configure() {
        return CONFIGURATION;
    }
    /**
     * {@inheritDoc}
     */
    @Override
    public CleanseFunctionResult execute(CleanseFunctionContext ctx) {

        CleanseFunctionResult output = new CleanseFunctionResult();
        CleanseFunctionInputParam param1 = ctx.getInputParam(CleanseConstants.INPUT_PORT_1);
        if (param1 == null || param1.isEmpty()) {
            output.putOutputParam(CleanseFunctionOutputParam.of(CleanseConstants.OUTPUT_PORT_1, (String) null));
            return output;
        }

        if (!ensureAllSingletons(ctx, output, param1)) {
            return output;
        }

        String input = param1.toSingletonValue();
        String result = cleanup(input);

        output.putOutputParam(CleanseFunctionOutputParam.of(CleanseConstants.OUTPUT_PORT_1, result));
        return output;
    }
    /**
     * Cleanup string.</br>
     * Method makes the following:
     * <ul>
     *     <li>Trim whitespaces.</li>
     *     <li>Removes all repeating whitespaces.</li>
     *     <li>Capitalize first letter of every sentence.</li>
     *     <li>Converts to lower case all other characters if they are:
     *         <ul>
     *             <li>Not surrounded by quotes</li>
     *             <li>Not first letter in a sentence</li>
     *         </ul
     *     </li>
     * </ul>
     * @param input input string.
     * @return output string.
     */
    private static String cleanup(String input) {

        if (input == null) {
            return null;
        }

        input = StringUtils.normalizeSpace(StringUtils.trim(input));
        char[] chars = input.toCharArray();
        boolean isQuotes = false;
        boolean isNew = false;
        for (int i = 0; i < chars.length; i++) {

            if (i != 0 && Character.isUpperCase(chars[i]) && !isQuotes) {
                chars[i] = Character.toLowerCase(chars[i]);
            }

            if (chars[i] == '.' || i == 0) {
                isNew = true;
            }

            if (isNew && chars[i] != ' ' && chars[i] != '.') {
                chars[i] = Character.toTitleCase(chars[i]);
                isNew = false;
            }
            // || chars[i] == '“'
            if (chars[i] == '\"'
             || chars[i] == '\''
             || chars[i] == '„'
             || chars[i] == '“'
             || chars[i] == '”'
             || chars[i] == '«'
             || chars[i] == '»') {
                isQuotes = !isQuotes;
            }
        }

        return String.valueOf(chars);
    }
}
